*** Prepare data for Mo, Holbein, Mitchell (2022)
*** Replication File for: 
*** 1) Figure S3
*** 2) Figure S4


*** OPEN DATASET AND SET PATHWAY
	*Given the proprietary nature of the TFA Admissions Data, the admissions data needed to replicate the study requires that researchers seeking to replicate the study sign a Non-Disclosure Agreement with TFA.
	*Please fill out and sign "TFA Confidentiality Agreement_MoConn.docx" and send to research@teachforamerica.org to gain access to "TFA_FinalAdmissions.dta"
	*Once access is granted to "TFA_FinalAdmissions.dta", you will be able to run "tfa_appendix.do" which will create the "TFA_merged.dta" dataset required to run this analyis
	
	
	clear
	set more off
	global class "Additional Analyses/"
	global output "/output"
	cd "Additional Analyses/" 
	
	// specify working directory here 
	
	use "$class/TFA_FinalAdmissions.dta", clear
	merge 1:1 personid using "TFA_survey_data_raw.dta"
	

	// 	Identify those that started survey
		gen started = 1 if status2 == "Finished Survey"
		replace started = 1 if status2 == "Partially Completed Survey"
		recode started (.=0)
	
	// DEFINE VARIABLES FOR MODEL
	
		*Create instrument: scoring above the cutoff 	
		gen Z=.
		replace Z = 1 if zscore>=0
		replace Z = 0 if zscore<0
		label variable Z "scored above the cutoff"
		
		*Create treatment: matriculated in TFA
		gen T=.
		replace T=1 if matriculated4==1
		replace T=0 if matriculated4==0
		label variable T "participated in TFA"
		
		*Identify those that started survey 
		drop started 
		gen started = .
		replace started= 1 if status2 == "Finished Survey"
		replace started = 1 if status2 == "Partially Completed Survey"
		recode started (.=0)
	
		*identify those admitted
		gen admit = 1 if admitted == "Y"
		replace admit = 0 if admitted == "N"

		*matriculation
		gen matriculate = matriculated4
		
	// Clean demographic variables 
		*Recode age to fall between 0 and 1
		gen age4 = (age-17) /49
		
		gen age3 = age2016
		format age3 %2.0f
		replace age3 = age if age3 == .
		
		*First gen
		gen first_gen=.
		replace first_gen = 1 if dem9<4
		replace first_gen=0 if dem9==4 | dem9==5
				
		*GPA
		gen gpa_clean=.
		replace gpa_clean = cumulativegpa if cumulativegpa>0 & cumulativegpa<=4

		*White
		gen white = 1 if dem4_IN == 1
		replace white = 0 if dem4_IN >1 & dem4_IN != .
		
		*Class
		tab dem16, gen(class)
		
		*Female
		gen female = dem3-1
		gen female_app = 1 if gender == "FEMALE"
		replace female_app = 0 if gender == "MALE"
		gen female3 = female
		replace female3 = female_app if female3 == .
		
		*Religiosity
		gen religiosity = 0 if dem11>=8 & dem11 <=10
		replace religiosity = 1 if dem11<=7 | dem11 == 11
		
	// Variable labels
		*Demographics
		label variable female3 "Female"
		label variable age4 "Age (recoded 0 to 1)"
		label variable age "Raw Age"
		label variable white "White"
		label variable gpa_clean "Cumulative College GPA"
		label variable first_gen "First Generation College Student"
		label variable pell "Pell Grant Recipient"
		label variable class1 "Upper Class"
		label variable class2 "Upper Middle Class"
		label variable class3 "Lower Middle Class"
		label variable class4 "Upper Lower Class"
		label variable class5 "Lower Class"
		label variable religiosity "Religiosity"
		label variable age2016 "Age"
		label variable matriculate "particulated in TFA"
	
		// save
		save "TFA_merged.dta", replace 


	set more off
	set scheme plotplain

	// Figure S1: Response rates estimates
	
		*reduced form effects
		rd finished T zscore, z0(0) cov(app_year1-app_year9) cluster(zscore) kernel(triangular)
		
		*note: see .R file to generate Figure S1
		
	//Figure S2: First stage estimates
	
		*group observations into bins
		egen zscore_bin = cut(zscore), at(-.5(.025).5)
		
		*calculate mean matriculant rate for each bin 
		bys zscore_bin: egen matric_rate=mean(T) if started==1 /*& appyear<2014 & appyear!=2008*/
		
		*calculate magnitude of first stage
		rd T zscore if started == 1, z0(0) cov(app_year1-app_year9) cluster(zscore) kernel(triangular)
		
		*use ivregress to get fstat
			*define bandwidth
			local bw = round(e(w),0.01)
		
			*define triangular kernel weight for ivregress
			gen weight=1-abs(zscore)/`bw' 
			
			*first stage results to match rd command
			ivregress 2sls finished (T = Z) zscore i.Z#c.zscore i.appyear [pw=weight] if (zscore>-`bw' & zscore<`bw' & started==1), first vce(cluster zscore) 
			
			*get F stat on excluded instruments
			estat firststage
				
		*note: see .R file to generate Figure S2
		
	// Figure S3: Reduced form estimates of pre-treatment characteristics
		
		*define global with list of controls
		global control female3  white ///
			first_gen pell class5 class4 class3 class2 class1 religiosity age4
			
		*2sls estimation
		eststo clear 
		foreach var in $control {
			rd `var' T zscore if started == 1, z0(0) cov(app_year1-app_year7) cluster(zscore) kernel(triangular)
			eststo `var'
			}
		
		coefplot female3 || age4 || white || first_gen || pell || class5 || ///
			class4 || class3 || class2 || class1 || religiosity, labels legend(off) ///
			bylabels("Female" "Age" "White" "First generation college student" "Pell grant recipient" "Lower class" "Upper lower class" "Lower middle class" "Upper middle class" "Upper class" "Religiosity") ///
			keep(numer) bycoefs xline(0) graphregion(col(white)) bgcol(white) msymbol(circle_hollow) mcolor(black) ciopts(color(black)) ysize(4) xsize(4)

		gr export "$output/coefficients.eps", as(eps)   replace
		
	// Figure S.4 Admission score distribution 
		*All TFA applicants 
		hist zscore if zscore>-.5 & zscore<.5, xlab(-.5 (.5) .5) xline(0) freq graphregion(color(white)) bin(40) ///
				xtitle(Admission Score) ytitle(Number of Applicants) title(Distribution of Admission Scores)
		
		gr export "$output/score_distribution.eps", replace
		
	// Figure S15: Careers
	
	** CAREER NON-ADMIT
	tab career9a1n_2_1 if appyear<2014 & zscore !=. & matriculated4 != 1
	tab career9a1n_2_2 if appyear<2014 & zscore !=. & matriculated4 != 1
	tab career9a1n_2_3 if appyear<2014 & zscore !=. & matriculated4 != 1
	* Output imported into FigureS15.xlsx to generate the three figures.


			
